





import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()
sns.set_style('whitegrid', {'font.sans-serif':['simhei', 'Arial']})





df = pd.DataFrame(
    np.random.rand(4, 7), 
    index = ["天安门", "故宫", "奥林匹克森林公园", "八达岭长城"],
    columns = ["周一", "周二", "周三", "周四", "周五", "周六", "周日"]
)
df


plt.figure(figsize=(10, 4))
sns.heatmap(df, annot=True, fmt = ".2f", cmap = "coolwarm")





# 读取并合并泰坦尼克数据
df = pd.concat(
    [
        pd.read_csv("./datas/titanic/titanic_train.csv"),
        pd.read_csv("./datas/titanic/titanic_test.csv")
    ]
)


df.head()


df.info()


# pands把字符串类型的列，变成分类数字编码
for field in ["Sex", "Cabin", "Embarked"]:
    df[field] = df[field].astype("category").cat.codes


df.info()


df.head(3)


# 计算不同变量之间两两相关系数
df.corr(numeric_only=True)


plt.figure(figsize=(12, 6))
sns.heatmap(df.corr(numeric_only=True), annot=True, fmt = ".2f", cmap = "coolwarm")



